headers in a code groups for now

library(tidyverse)
library(sf)
library(tigris)
library(censusapi)
library(mapview)
library(leaflet)
library(mapboxapi)
library(plotly)
# install.packages("tidytransit") ## be sure to install tidytransit if you want to run transit portions
library(tidytransit)

Sys.setenv(CENSUS_KEY="dbcdeeed2f9e907f31811ee78c1f239a2aa77934")

# mac path to A2 data: /Volumes/GoogleDrive/.shortcut-targets-by-id/1i5yZPOBgJfC_blys_kNUJn1-FfLWoyiO/Assignments/2A/data/ "file"

# mac path to A2 data: /Volumes/GoogleDrive/Shared drives/SFBI/Data Library/OSM/ "file"

## adding 'path_item' for easy retrieval
path_data <-  "/Volumes/GoogleDrive/.shortcut-targets-by-id/1i5yZPOBgJfC_blys_kNUJn1-FfLWoyiO/Assignments/2A/data/" #"G:/My Drive/218Y/Assignments/2A/data/"
path_pois <- "/Volumes/GoogleDrive/Shared drives/SFBI/Data Library/OSM/" #"G:/Shared drives/SFBI/Data Library/OSM/"

POI Maintenance

 pois <- st_read(paste0(path_pois,"gis_osm_pois_a_free_1.shp")) 
pois_filter <- pois %>%
  rename(amenity = fclass) %>%
  filter(amenity %in% c(
    "community_centre",
    "convenience",
    "fast_food",
    "supermarket",
    "park",
    "green grocer", #was not captured in poi data for bay area
    "playground",
    "kindergarten",
    "school",
    "library"#,
    #"tram_stop", was not captured in poi data for bay area
    #"bus_stop" was not captured in poi data for bay area
  ))

pois_filter_summary <- pois_filter %>%
  st_drop_geometry() %>%
  group_by(amenity) %>%
  count() %>%
  arrange(desc(n))

pois_filter_summary

mapview(pois_filter, zcol = "amenity")
# saveRDS(pois_filter, "/Volumes/GoogleDrive/.shortcut-targets-by-id/1i5yZPOBgJfC_blys_kNUJn1-FfLWoyiO/Assignments/2A/data/pois_filter.rds")
pois_filter <- readRDS(paste0(path_data,"pois_filter.rds"))

Area of Interest -

alm_cbgs <- block_groups("CA","Alameda",2020)

Oak_boundary <- places("CA",2020) %>% 
  filter(NAME == "Oakland")

Oak_cbgs <- alm_cbgs %>% 
  st_centroid() %>% 
  .[Oak_boundary, ] %>% 
  st_drop_geometry() %>% 
  left_join(alm_cbgs %>% select(GEOID)) %>% 
  st_as_sf()

 mapview(Oak_cbgs)

WOak_cbgs <- Oak_cbgs[c(87, 86, 88, 95, 159, 103, 102, 38, 41, 40, 39, 34, 169, 106, 105, 104, 101,
                        288, 290, 289, 37, 36, 35),]   #pulled from map of block groups

alm_blk <- blocks("CA","Alameda",2020)

WOak_blk <- alm_blk %>% 
  st_centroid() %>% 
  .[WOak_cbgs, ] %>% 
  st_drop_geometry() %>% 
  left_join(alm_blk %>% select(GEOID20)) %>% 
  st_as_sf()

#saveRDS(WOak_blk, "/Volumes/GoogleDrive/.shortcut-targets-by-id/1i5yZPOBgJfC_blys_kNUJn1-FfLWoyiO/Assignments/2A/data/WestOaklandBlocks.rds")
WOak_blk <- readRDS(paste0(path_data,"WestOaklandBlocks.rds"))
isochrones <- c("walking","cycling","driving") %>%
  map_dfr(function(x){
    mb_isochrone(
      WOak_blk,
      profile = x,
      time = c(5,10,15)
    ) %>%
      mutate(mode = x)
})
#saveRDS(isochrones, "/Volumes/GoogleDrive/.shortcut-targets-by-id/1i5yZPOBgJfC_blys_kNUJn1-FfLWoyiO/Assignments/2A/data/WOak_isochrones.rds")
isochrones <- readRDS(paste0(path_data,"WOak_isochrones.rds"))
mapview(WOak_blk)
mapview(isochrones %>% filter(mode == "walking"))
mapview(pois_filter, zcol = "amenity")

transit data

# grabbing GTFS data; can't find the zip file to line so I just downloaded the file and added it to our Gdrive data
gtfs <- read_gtfs(paste0(path_data,"gtfs.zip"))

# only including walking and biking isochrones bc people most likely aren't driving to a bus stop.
isochrones_walking <- isochrones %>% 
  filter(mode == "walking")

isochrones_cycling <- isochrones %>% 
  filter(mode == "cycling")

# grabbing all the stops
stops_all <- gtfs$stops %>% 
  st_as_sf(coords = c("stop_lon", 
                      "stop_lat"), crs = 4326)
# visualize the stops
mapview(stops_all)


# spatially filter for all stops within walking isochrones
stops_walk <- stops_all %>% 
  .[isochrones_walking,] %>% 
  select(c(stop_id, stop_name,geometry)) %>% 
  mutate(mode = "walking") %>% 
  st_as_sf()

## stops within cycling isochrones if we wanted to include
# stops_cycle <- stops_all %>% 
#   .[isochrones_cycling,]

# visualize stops within walking and cycling distance
mapview(stops_walk)
mapview(stops_cycle)

stops_walk <- saveRDS(stops_walk, paste0(path_data,"stops_walk.rds")) # having trouble saving this

## attempt to add stops to POI

stops_poi <- stops_walk %>%
  transmute(osm_id = stop_id, code = NA, amenity = "transit_stop", name=stop_name, geometry=geometry)

full_pois <- rbind(pois_filter,stops_poi)

saveRDS(full_pois, paste0(path_data,"full_pois.rds"))
full_pois <- readRDS(paste0(path_data,"full_pois.rds"))

## don't need the section below; starts to get at including transit rides for our isochrones

# isochrones_firstmile <- isochrones_walking %>% 
#   st_make_valid() %>% 
#   st_join(stops_walk) %>% 
#   st_drop_geometry() %>% 
#   group_by(stop_id) %>% 
#   arrange(time) %>% 
#   filter(!duplicated(stop_id)) %>% 
#   select(
#     firstmile_time = time,
#     id,
#     from_stop_id = stop_id
#   )
#saveRDS(full_pois, paste0(path_data,"full_pois.rds"))
full_pois <- readRDS(paste0(path_data,"full_pois.rds"))
sf::sf_use_s2(FALSE)
access_raw <- isochrones %>%
  st_make_valid() %>%
  st_join(full_pois) %>%
  st_drop_geometry()

access_raw <- access_raw %>%
  filter(!is.na(osm_id)) %>%
  
  # removing all cycling and driving transit stops
  filter(
    ! ( (mode %in% c("cycling","driving")) & (amenity %in% c("transit_stop")) ) 
         ) %>%
  
  #adding super market tag, if 1 is supermarket
  mutate(
    isSM_walk = case_when(
      (mode %in% c("walking")) & (amenity %in% c("supermarket")) ~ 1,
      TRUE ~ 0
    ),
    isSM_cycle = case_when(
      (mode %in% c("cycling")) & (amenity %in% c("supermarket")) ~ 1,
      TRUE ~ 0
    ),
    isSM_drive = case_when(
      (mode %in% c("driving")) & (amenity %in% c("supermarket")) ~ 1,
      TRUE ~ 0
    )
  )
  


saveRDS(access_raw, paste0(path_data,"access_raw.rds"))
access_raw <- readRDS(paste0(path_data, "access_raw.rds"))
#loading amenity pref. data from CSV file
amenity_preference <- read.csv(paste0(path_data,"2A - amenity_preference.csv")) %>% 
  select(-amenity_decay) %>% 
  mutate(
    amenity_decay = -log(.5)/amenity_quantity
  )

# loading mode pref from CSV file
mode_preference <- read.csv(paste0(path_data,"2A - mode_preference.csv")) %>% 
  select(-mode_decay) %>% 
  mutate(
    mode_decay = -log(.5)/mode_reasonable
  )

Scoring West Oakland

# baseline score for West Oakland
complete_baseline <- data.frame(
  amenity = amenity_preference$amenity %>% 
    rep(amenity_preference$amenity_quantity)
) %>%
  left_join(
    amenity_preference,
    by = "amenity"
  ) %>%
  group_by(amenity) %>% 
  mutate(
    amenity_rank = row_number()-1
  ) %>% 
  ungroup() %>% 
  mutate(
    score = amenity_value * exp(-amenity_rank * amenity_decay) * .5
  )

sum(complete_baseline$score)
## [1] 7.952573
complete_temp <- access_raw %>% 
  left_join(
    amenity_preference,
    by = 'amenity'
  ) %>% 
  left_join(
    mode_preference,
    by = 'mode'
  ) %>% 
  group_by(id, mode, amenity) %>% 
  arrange(time) %>% 
  mutate(
    amenity_rank = row_number()-1
  ) %>% 
  ungroup()
# move crit amenity tag to end for legibility
complete_temp <- complete_temp[, c(1,2,3,4,5,6,7,11,12,13,14,15,16,17,8,9,10)]


# grabbing complete modes bc some NAs
complete_modes_ungrouped <- complete_temp %>% 
  mutate(
    score = amenity_value * exp(-amenity_rank * amenity_decay) * exp(-time*mode_decay) * mode_value
  ) 

# adding crit amenity flag, if != 0 then there is an accessible supermarket
crit_amenity <- complete_modes_ungrouped %>%
  group_by(id) %>%
  summarize(
    SM_walk = sum(isSM_walk, na.rm = T),
    SM_cycle = sum(isSM_cycle, na.rm = T),
    SM_drive = sum(isSM_drive, na.rm = T)
  )
  

# summing mode scores
complete_modes <- complete_modes_ungrouped %>%
  group_by(id, mode) %>%
  arrange(desc(score)) %>%
  filter(!duplicated(osm_id)) %>%
  summarize(
    score = sum(score, na.rm = T)/sum(complete_baseline$score)
  )
complete_total <- complete_temp %>% 
  mutate(
    score = amenity_value * exp(-amenity_rank * amenity_decay) *mode_value * exp(-time*mode_decay)
  ) %>% 
  group_by(id) %>% 
  arrange(desc(score)) %>% 
  filter(!duplicated(osm_id)) %>% 
  summarise(
    score = sum(score, na.rm = T)/sum(complete_baseline$score)
  ) %>% 
  mutate(mode = "total")
complete <- rbind(
  complete_modes,
  complete_total
) 

# created formatted score for hover tool with super market flag (if there is a super market there is a star)
complete_format <- complete %>% 
  pivot_wider(
    names_from = "mode",
    values_from = "score"
  ) %>%
  cbind(select(crit_amenity, !id)) %>%
  mutate(
    `Walking Score` = case_when(
      SM_walk > 0 ~ paste0(round(walking,2),"*"),
      SM_walk == 0 ~ as.character(round(walking,2))
    ),
    `Cycling Score` = case_when(
      SM_cycle > 0 ~ paste0(round(cycling,2),"*"),
      SM_cycle == 0 ~ as.character(round(cycling,2))
    ),
    `Driving Score` = case_when(
      SM_drive > 0 ~ paste0(round(driving,2),"*"),
      SM_drive == 0 ~ as.character(round(driving,2))
    ),
    `Total Score` = case_when(
      SM_drive > 0 ~ paste0(round(driving,2),"*"), # only need to check drive b/c largest area
      SM_drive == 0 ~ as.character(round(driving,2))
    )
  ) 

complete_map <- complete %>% 
  pivot_wider(
    names_from = "mode",
    values_from = "score"
  ) %>% 
  cbind(WOak_blk %>% 
          select(GEOID20)) %>% 
  st_as_sf()

mapview(complete_map, zcol = "walking")

Equity analysis

walking_quartile <- quantile(complete_map$walking)
cycling_quartile <- quantile(complete_map$cycling)
driving_quartile <- quantile(complete_map$driving)
total_quartile <- quantile(complete_map$total)


complete_quart <-
  complete_map %>%
  
  st_drop_geometry()%>%
  
  mutate(
    walking_q = case_when(
      walking <= walking_quartile[2] ~ "1 - Worst",
      walking <= walking_quartile[3] ~ "2",
      walking <= walking_quartile[4] ~ "3",
      walking <= walking_quartile[5] ~ "4 - Best"
    ),
    cycling_q = case_when(
      cycling <= cycling_quartile[2] ~ "1 - Worst",
      cycling <= cycling_quartile[3] ~ "2",
      cycling <= cycling_quartile[4] ~ "3",
      cycling <= cycling_quartile[5] ~ "4 - Best"
    ),
    driving_q = case_when(
      driving <= driving_quartile[2] ~ "1 - Worst",
      driving <= driving_quartile[3] ~ "2",
      driving <= driving_quartile[4] ~ "3",
      driving <= driving_quartile[5] ~ "4 - Best"
    ),
    total_q = case_when(
      total <= total_quartile[2] ~ "1 - Worst",
      total <= total_quartile[3] ~ "2",
      total <= total_quartile[4] ~ "3",
      total <= total_quartile[5] ~ "4 - Best"
    )
  ) %>%
  
  select(ends_with("_q"),GEOID20)
# getting associated census variables data
dec_vars_2020 <-
  listCensusMetadata(
    name = "2020/dec/pl",
    type = "variables"
  )

#saveRDS(dec_vars_2020, paste0(path_data,"dec_vars_2020.rds"))
dec_vars_2020 <- readRDS(paste0(path_data, "dec_vars_2020.rds"))

race_categories <- c(
  "White alone",
  "Black or African American alone",
  "American Indian and Alaska Native alone",
  "Asian alone",
  "Native Hawaiian and Other Pacific Islander alone",
  "Some Other Race alone",
  "Two or more Races"
)

# pulling decenial data for race of alameda county blocks and cleaning
alm_pop_race_2020 <-
  getCensus(
    name = "dec/pl", vintage = 2020, region = "block:*", 
    regionin = "state:06+county:001", vars = "group(P1)") %>% 
  
  mutate(
    block = paste0(state,county,tract,block)) %>% 
  
  select(
    !c(GEO_ID,state,county,tract,NAME) & !ends_with(c("NA"))) %>% 
  
  pivot_longer(
    ends_with("N"), names_to = "name", values_to = "estimate") %>%
  
  left_join(
    dec_vars_2020 %>% 
      select(name, label)) %>% 
  
  select(-name) %>% 
  
  separate(
    label, into = c(NA,NA,"category1","category2"), sep = "!!") %>% 
  
  mutate(
    race = case_when(
      category1 == "Population of two or more races:" & is.na(category2) ~ "Two or more races",
      category1 == "Population of two or more races:" ~ "",
      !is.na(category2) ~ category2,
      TRUE ~ ""
    ))%>% 
  
  filter(race != "") %>% 
  
  select(GEOID20 = block, race, pop20 = estimate) 
dec_vars_2020 <- readRDS(paste0(path_data, "dec_vars_2020.rds"))

#saveRDS(alm_pop_race_2020, paste0(path_data,"alm_pop_race_2020.rds"))
alm_pop_race_2020 <- readRDS(paste0(path_data, "alm_pop_race_2020.rds"))
# combining scores with race data for blocks
equity_walking <- alm_pop_race_2020 %>%
  
  left_join(complete_quart %>% select(GEOID20, walking_q), by = "GEOID20") %>%
    
  filter(!is.na(walking_q)) %>%
  
  group_by(race, walking_q) %>%
  summarise(
    estimated_pop = sum(pop20)
  )
  

# re-adding values for population distribution for comparison
WO_race_total <-
  equity_walking %>% 
  group_by(race) %>% 
  summarize(estimated_pop = sum(estimated_pop)) %>% 
  mutate(walking_q = "Total")

# making equity plot
equity_walking_plot_gg <- 
  equity_walking %>% 
  rbind(WO_race_total) %>% 
  ggplot() +
  geom_bar(
    aes(
      x = walking_q %>% factor(levels = c(unique(equity_walking$walking_q),"Total")),
      y = estimated_pop,
      fill = race %>% factor(levels = unique(equity_walking$race)),
      text =  estimated_pop/sum(estimated_pop)
    ),
    stat = "identity",
    position = "fill"
  ) +
  labs(
    x = "Quartile of Walking Score",
    y = "Proportion of Population ",
    title = "West Oakland Walking Accessibility Equity Analysis",
    fill = "Race"
  ) +
  coord_flip() +
  theme(
    legend.position = "bottom",
    legend.direction = "vertical"
  )  +
  guides(
    fill = guide_legend(
    )
  )

equity_walking_plot <- ggplotly(equity_walking_plot_gg, tooltip = "y")

equity_walking_plot
# combining scores with race data for blocks
equity_cycling <- alm_pop_race_2020 %>%
  
  left_join(complete_quart %>% select(GEOID20, cycling_q), by = "GEOID20") %>%
    
  filter(!is.na(cycling_q)) %>%
  
  group_by(race, cycling_q) %>%
  summarise(
    estimated_pop = sum(pop20)
  )
  
# re-adding values for population distribution for comparison
WO_race_total <-
  equity_cycling %>% 
  group_by(race) %>% 
  summarize(estimated_pop = sum(estimated_pop)) %>% 
  mutate(cycling_q = "Total")

# making equity plot
equity_cycling_plot_gg <- 
  equity_cycling %>% 
  rbind(WO_race_total) %>% 
  ggplot() +
  geom_bar(
    aes(
      x = cycling_q %>% factor(levels = c(unique(equity_cycling$cycling_q),"Total")),
      y = estimated_pop,
      fill = race %>% factor(levels = unique(equity_cycling$race))
    ),
    stat = "identity",
    position = "fill"
  ) +
  labs(
    x = "Quartile of Cycling Score",
    y = "Proportion of Population ",
    title = "West Oakland Cycling Accessibility Equity Analysis",
    fill = "Race"
  ) +
  coord_flip() +
  theme(
    legend.position = "bottom",
    legend.direction = "vertical"
  )  +
  guides(
    fill = guide_legend(
    )
  )

equity_cycling_plot <- ggplotly(equity_cycling_plot_gg, tooltip = "y")

equity_cycling_plot 
# combining scores with race data for blocks
equity_driving <- alm_pop_race_2020 %>%
  
  left_join(complete_quart %>% select(GEOID20, driving_q), by = "GEOID20") %>%
    
  filter(!is.na(driving_q)) %>%
  
  group_by(race, driving_q) %>%
  summarise(
    estimated_pop = sum(pop20)
  )
  
# re-adding values for population distribution for comparison
WO_race_total <-
  equity_driving %>% 
  group_by(race) %>% 
  summarize(estimated_pop = sum(estimated_pop)) %>% 
  mutate(driving_q = "Total")

# making equity plot
equity_driving_plot_gg <- 
  equity_driving %>% 
  rbind(WO_race_total) %>% 
  ggplot() +
  geom_bar(
    aes(
      x = driving_q %>% factor(levels = c(unique(equity_driving$driving_q),"Total")),
      y = estimated_pop,
      fill = race %>% factor(levels = unique(equity_driving$race))
    ),
    stat = "identity",
    position = "fill"
  ) +
  labs(
    x = "Quartile of Driving Score",
    y = "Proportion of Population ",
    title = "West Oakland Driving Accessibility Equity Analysis",
    fill = "Race"
  ) +
  coord_flip() +
  theme(
    legend.position = "bottom",
    legend.direction = "vertical"
  )  +
  guides(
    fill = guide_legend(
    )
  )

equity_driving_plot <- ggplotly(equity_driving_plot_gg, tooltip = "y")

equity_driving_plot